一、简述:¶
最近无聊想搞一下极验的滑块验证码破解这块,发现破解js代码耗时又耗力出现版本更新可能以前的所有努力都要推翻重做,不够通用性,最后还是选用selenium + PIL 来实现滑块验证码的破解。 期间也翻阅过很多文章,大多都已经失效,并且缺口位置查找和模拟滑动轨迹成功率很低,很难应用到实际开发项目中,本次是针对最新版本的极验滑块验证码进行破解。
二、项目环境¶
大致需要用到以下模块各位看观请提前准备好: python3.6、selenium、numpy、PIL、chromedriver
三、分析步骤以及代码编写¶
-
首先分析目标网站(本次主要以geetest官网滑块demo为参考)
1.png
网站大致长这个样子,首先f12打开 开发者工具选择Elements查看节点,发现最新版本的滑块图片是使用画布来进行呈现的,期间查阅大量文档,使用如下代码获得画布中的图片数据,获取到的图片是base64进行编码的
1 | document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png") |
2.png
2.通过分析发现这两个画布放的是所需要的背景图和缺口图(其实一眼就看出来的)
3.png
- 接下来就是代码的编写了 3.1 首先是获得背景图和缺口图的数据
1 2 3 4 5 6 7 8 9 10 11 12 13 | def get_images(self): """ 获取验证码图片 :return: 图片的location信息 """ time.sleep(1) self.browser.web_driver_wait_ruishu(10, "class", 'geetest_canvas_slice') fullgb = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_bg geetest_' 'absolute")[0].toDataURL("image/png")')["value"] bg = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_fullbg geetest_fade' ' geetest_absolute")[0].toDataURL("image/png")')["value"] return bg, fullgb |
3.2 对数据进行解码操作并保存图片
1 2 3 4 5 6 7 8 9 | def get_decode_image(self, filename, location_list): """ 解码base64数据 """ _, img = location_list.split(",") img = base64.decodebytes(img.encode()) new_im: image.Image = image.open(BytesIO(img)) return new_im |
3.3 接下来就是计算缺口位置了(这里使用的PIL中计算两张图片的差值获得缺口位置)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | def compute_gap(self, img1, img2): """计算缺口偏移 这种方式成功率很高""" # 将图片修改为RGB模式 img1 = img1.convert("RGB") img2 = img2.convert("RGB") # 计算差值 diff = ImageChops.difference(img1, img2) # 灰度图 diff = diff.convert("L") # 二值化 diff = diff.point(self.table, '1') left = 43 # 这里做了优化为减少误差 纵坐标的像素点大于5时才认为是找到 # 防止缺口有凸起时有误差 for w in range(left, diff.size[0]): lis = [] for h in range(diff.size[1]): if diff.load()[w, h] == 1: lis.append(w) if len(lis) > 5: return w |
3.4 当滑块的缺口位置找到以后就需要生成滑动轨迹(其中加20是保证在滑动时先超过缺口位置然后在慢慢还原到正确位置)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | def ease_out_quart(self, x): return 1 - pow(1 - x, 4) def get_tracks_2(self, distance, seconds, ease_func): """ 根据轨迹离散分布生成的数学 生成 # 参考文档 https://www.jianshu.com/p/3f968958af5a 成功率很高 90% 往上 :param distance: 缺口位置 :param seconds: 时间 :param ease_func: 生成函数 :return: 轨迹数组 """ distance += 20 tracks = [0] offsets = [0] for t in np.arange(0.0, seconds, 0.1): ease = ease_func offset = round(ease(t / seconds) * distance) tracks.append(offset - offsets[-1]) offsets.append(offset) tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1]) return tracks |
3.5 最后也就是滑动滑块到缺口位置
1 2 3 4 5 6 7 8 9 10 11 | def move_to_gap(self, track): """移动滑块到缺口处""" slider = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_slider_button'))) ActionChains(self.browser).click_and_hold(slider).perform() while track: x = track.pop(0) ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() time.sleep(0.02) ActionChains(self.browser).release().perform() |
贴出完整代码(注意selenium有些方法会被极验检测到所以使用js命令直接运行的方式来达到效果)
crack.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | # -*-coding:utf-8 -*- import base64 import time import functools import numpy as np from tools.selenium_spider import SeleniumSpider from selenium.webdriver import ActionChains from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By import PIL.Image as image from PIL import ImageChops, PngImagePlugin from io import BytesIO class Crack(object): """ 解决三代极验滑块验证码 """ def __init__(self): self.url = 'https://www.geetest.com' self.browser = SeleniumSpider(path="/personalwork/personal_tools_project/adbtools/chromedriver", max_window=True) self.wait = WebDriverWait(self.browser, 100) self.BORDER = 8 self.table = [] for i in range(256): if i < 50: self.table.append(0) else: self.table.append(1) def open(self): """ 打开浏览器,并输入查询内容 """ self.browser.get(self.url) self.browser.get(self.url + "/Sensebot/") self.browser.web_driver_wait_ruishu(10, "class", 'experience--area') time.sleep(1) self.browser.execute_js('document.getElementsByClassName("experience--area")[0].getElementsByTagName("div")' '[2].getElementsByTagName("ul")[0].getElementsByTagName("li")[1].click()') time.sleep(1) self.browser.web_driver_wait_ruishu(10, "class", 'geetest_radar_tip') self.browser.execute_js('document.getElementsByClassName("geetest_radar_tip")[0].click()') def check_status(self): """ 检测是否需要滑块验证码 :return: """ self.browser.web_driver_wait_ruishu(10, "class", 'geetest_success_radar_tip_content') try: time.sleep(0.5) message = self.browser.find_element_by_class_name("geetest_success_radar_tip_content").text if message == "验证成功": return False else: return True except Exception as e: return True def get_images(self): """ 获取验证码图片 :return: 图片的location信息 """ time.sleep(1) self.browser.web_driver_wait_ruishu(10, "class", 'geetest_canvas_slice') fullgb = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_bg geetest_' 'absolute")[0].toDataURL("image/png")')["value"] bg = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_fullbg geetest_fade' ' geetest_absolute")[0].toDataURL("image/png")')["value"] return bg, fullgb def get_decode_image(self, filename, location_list): """ 解码base64数据 """ _, img = location_list.split(",") img = base64.decodebytes(img.encode()) new_im: PngImagePlugin.PngImageFile = image.open(BytesIO(img)) # new_im.convert("RGB") # new_im.save(filename) return new_im def compute_gap(self, img1, img2): """计算缺口偏移 这种方式成功率很高""" # 将图片修改为RGB模式 img1 = img1.convert("RGB") img2 = img2.convert("RGB") # 计算差值 diff = ImageChops.difference(img1, img2) # 灰度图 diff = diff.convert("L") # 二值化 diff = diff.point(self.table, '1') left = 43 for w in range(left, diff.size[0]): lis = [] for h in range(diff.size[1]): if diff.load()[w, h] == 1: lis.append(w) if len(lis) > 5: return w def ease_out_quad(self, x): return 1 - (1 - x) * (1 - x) def ease_out_quart(self, x): return 1 - pow(1 - x, 4) def ease_out_expo(self, x): if x == 1: return 1 else: return 1 - pow(2, -10 * x) def get_tracks_2(self, distance, seconds, ease_func): """ 根据轨迹离散分布生成的数学 生成 # 参考文档 https://www.jianshu.com/p/3f968958af5a 成功率很高 90% 往上 :param distance: 缺口位置 :param seconds: 时间 :param ease_func: 生成函数 :return: 轨迹数组 """ distance += 20 tracks = [0] offsets = [0] for t in np.arange(0.0, seconds, 0.1): ease = ease_func offset = round(ease(t / seconds) * distance) tracks.append(offset - offsets[-1]) offsets.append(offset) tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1]) return tracks def get_track(self, distance): """ 根据物理学生成方式 极验不能用 成功率基本为0 :param distance: 偏移量 :return: 移动轨迹 """ distance += 20 # 移动轨迹 track = [] # 当前位移 current = 0 # 减速阈值 mid = distance * 3 / 5 # 计算间隔 t = 0.5 # 初速度 v = 0 while current < distance: if current < mid: # 加速度为正2 a = 2 else: # 加速度为负3 a = -3 # 初速度v0 v0 = v # 当前速度v = v0 + at v = v0 + a * t # 移动距离x = v0t + 1/2 * a * t^2 move = v0 * t + 0.5 * a * (t ** 2) # 当前位移 current += move # 加入轨迹 track.append(round(move)) track.extend([-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1]) return track def move_to_gap(self, track): """移动滑块到缺口处""" slider = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_slider_button'))) ActionChains(self.browser).click_and_hold(slider).perform() while track: x = track.pop(0) ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() time.sleep(0.02) ActionChains(self.browser).release().perform() def crack(self, n): # 打开浏览器 self.open() if self.check_status(): # 保存的图片名字 bg_filename = 'bg.png' fullbg_filename = 'fullbg.png' # 获取图片 bg_location_base64, fullbg_location_64 = self.get_images() # 根据位置对图片进行合并还原 bg_img = self.get_decode_image(bg_filename, bg_location_base64) fullbg_img = self.get_decode_image(fullbg_filename, fullbg_location_64) # 获取缺口位置 gap = self.compute_gap(fullbg_img, bg_img) print('缺口位置', gap) track = self.get_tracks_2(gap - self.BORDER, 1, self.ease_out_quart) print("滑动轨迹", track) print("滑动距离", functools.reduce(lambda x, y: x+y, track)) self.move_to_gap(track) time.sleep(1) if not self.check_status(): print('验证成功') return True else: print('验证失败') # 保存图片方便调试 bg_img.save(f"bg_img{n}.png") fullbg_img.save(f"fullbg{n}.png") return False else: print("验证成功") return True if __name__ == '__main__': print('开始验证') crack = Crack() # crack.crack(0) count = 0 for i in range(200): if crack.crack(i): count += 1 print(f"成功率:{count / 200 * 100}%") |
selenium_spider.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | #!/usr/local/bin/python # coding:utf-8 """ @author: Liubing @software: PyCharm @file: selenium_spider.py @time: 2019-03-11 13:46 @describe: 基于selenium版本进一步封装 只针对于谷歌浏览器 其他浏览器需要自己封装 """ import json import time as time_ from lxml import etree from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.options import Options class SeleniumSpider(WebDriver): """基于selenium进一步封装""" def __init__(self, path, params=None, max_window=False, *args, **kwargs): """ 初始化 :param path: str selenium驱动路径 :param params: list driver 附加参数 :param args: tuple :param kwargs: """ self.__path = path self.__params = params # 初始化 self.__options = Options() self.__options.add_argument('--dns-prefetch-disable') self.__options.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug self.__options.add_argument('disable-infobars') # 隐藏"Chrome正在受到自动软件的控制" # self.__options.add_argument('--headless') self.is_maximize_window = max_window # 是否开启全屏模式 # 过检测 具体参考文档: https://juejin.im/post/5c62b6d5f265da2dab17ae3c self.__options.add_experimental_option('excludeSwitches', ['enable-automation']) if params: for i in params: self.__options.add_argument(i) super(SeleniumSpider, self).__init__(executable_path=self.__path, options=self.__options, *args, **kwargs) if self.is_maximize_window: self.maximize_window() # 规则部分 self.ID = "id" self.XPATH = "xpath" self.LINK_TEXT = "link text" self.PARTIAL_LINK_TEXT = "partial link text" self.NAME = "name" self.TAG_NAME = "tag name" self.CLASS_NAME = "class name" self.CSS_SELECTOR = "css selector" def cookies_dict_to_selenium_cookies(self, cookies: dict, domain): """ requests cookies 转换到 selenium cookies :param cookies: requests cookies :return: selenium 支持的cookies """ temp_cookies = [] for key, value in cookies.items(): # requests 有bug 域区分的不是很清楚 手动区分 只限全国电信接口能用 temp_cookies.append({"name": key, "value": value, "domain": domain}) return temp_cookies def get(self, url: str, cookies=None, domain=None): """ 请求数据 :param url: 待请求的url :param cookies: 添加cookies cookies 格式 [{"name": key, "value": value, "domain": domain},...] :param domain: cookie作用域 :return: """ super().get(url) if cookies: # 执行 if type(cookies) == list: for cookie in cookies: if "name" in cookie.keys() and "value" in cookie.keys() and "domain" in cookie.keys(): self.add_cookie(cookie) else: raise TypeError('cookies错误请传入正确格式[{"name": key, "value": value, "domain": domain},...' '] 或者{key: vale,...}') elif type(cookies) == dict: if domain: for i in self.cookies_dict_to_selenium_cookies(cookies, domain): self.add_cookie(i) else: raise ValueError("{key:vale}格式必须传入doamin参数") # 刷新页面 self.refresh() def web_driver_wait(self, time: int, rule: str, num: str): """ 页面等待 瑞数产品弃用这种方法 不然会400错误 :param time: 等待时间 :param rule: 规则 [id, xpath, link text, partial link text, name, tag name, class name, css selector] :param num: 根据元素id :return: """ WebDriverWait(self, time, 0.5).until( EC.presence_of_element_located((rule, num))) def web_driver_wait_ruishu(self, time: int, rule: str, num: str): """ 笨方法 遍历页面匹配 :param time: 等待时间 :param rule: 规则 [id, class] :param num: 根据元素id :return: """ while time: response = self.execute_js("document.documentElement.outerHTML") try: html = etree.HTML(text=response["value"]) inp = html.xpath("//*[contains(@%s, '%s')]" % (rule, num)) if inp: break except Exception as e: continue time_.sleep(1) time -= 1 if not time: raise Exception("未找到 %s" % num) def execute_chrome_protocol_js(self, protocol, params: dict): """ Chrome DevTools 协议操作 具体协议请参考 https://chromedevtools.github.io/devtools-protocol/ :param protocol: str 协议名称 :param params: dict 参数 :return: """ resource = "/session/%s/chromium/send_command_and_get_result" % self.session_id command_executor = self.command_executor url = command_executor._url + resource body = json.dumps({'cmd': protocol, 'params': params}) response = command_executor._request('POST', url, body) if response['status']: return response return response["value"] def execute_js(self, js): """ 执行js 过瑞数检测 :param js: str 待执行的js :return: {"type": "xxx", value: "xxx"} """ resource = "/session/%s/chromium/send_command_and_get_result" % self.session_id command_executor = self.command_executor url = command_executor._url + resource body = json.dumps({'cmd': "Runtime.evaluate", 'params': {"expression": js}}) response = command_executor._request('POST', url, body) if response['status']: return response return response["value"]["result"] |
问题:对于windows下滑动卡顿导致不成功问题解决办法
修改源码文件 site-packages\selenium\webdriver\common\actions\pointer_input.py
将默认250改为30或者其他值就可以(不能太快)
image.png
最后以极验官网demo为例进行了200次的测试,准确率高达了99%。